import read
import parse
import json

local_web_filepaths = [
    "./output/从负债6万，到靠抖店一月赚18万，这个副业今年一定要做！！ - 知乎.html",
    "./output/深空彼岸_深空彼岸全文阅读_辰东_笔趣阁_beqege.com.html",
    "./output/太古第一仙_太古第一仙最新章节_风青阳_笔趣阁_beqege.com.html",
    "./output/江南原文、翻译及赏析、拼音版及朗读_汉乐府古诗_古诗文网.html",
    "./output/教程 - ttkbootstrap.html",
    "./output/有哪些看似不起眼却月入几千的小生意或者兼职？ - 知乎.html"
]


if __name__ == '__main__':
    
    spider_text = read.file(local_web_filepaths[-1])

    etree_parse = parse.Parse(spider_text)
    
    # 知乎文章 //*[@id='root']/div/main/div/article/div[1]/div/div/div/p
    # 知乎问题 //*[@id='root']/div/main/div/div/div[3]/div[1]/div/div[2]/div/div/div/div[2]/span[1]/div/div/span/p
    # 古诗文 //*[@id='contsonef9cd9ba44bb']
    # 笔趣阁 //*[@id='list']/dl/dd/a
    # ttkbootstrap /html/body/div[3]/main/div/div[3]/article
    elements = etree_parse.getElements("//*[@id='root']/div/main/div/div/div[3]/div/div/div[3]/div/div/div/div[2]/span[1]/div/div/span/p")

    results = []

    # for element in elements:
    #     results.append(parse.loop_node(element, ["code"], 2)[1])

    print(elements)

    # for result in results:
    #     print(json.dumps(result,default=str, ensure_ascii=False))